##Goals
Load in phyloseq data with rooted tree. Evaluate sequencing depth and remove sample. Normalize the read counts between samples. Calculate community dissimilarities. Numbers between 0 and 1. If 0, completely similar versus if they are 1, then they’re completely dissimilar. Sorensen: Shared Species as a binary value: Abundance-unweighted Bray-Curtis: Shared Abundant species: Abundance-weighted (Abundance-)Weighted UNIFRAC: Consider Abundant Species and where they fall on the tree Visualize the community data with two unconstrained Ordinations: PCoA: Linear Method. Eigenvalue = how much variation is explained by each axis. Choose to view axis 1, 2, 3, etc. and plot them together. NMDS: Non-linear. Smush multiple Dimensions into 2 or 3 axes. Need to report Stress value (ideally <0.15). Run statistics with PERMANOVA and betadispR.
##Set seed
set.seed(2443)
##Load libraries
#install.packages("vegan")
pacman::p_load(tidyverse, devtools, phyloseq, patchwork, vegan,
install = FALSE)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
# Load Lakestie colors
lakesite_colors <- c("110" = "dodgerblue4","M15" = "dodgerblue2","M45" = "#D9CC3C","MLB" = "#A0E0BA")
##Load Data
# Load in rooted phylogenetic tree!
load("data/03_Phylogenetic_Tree/phytree_preprocessed_physeq.RData")
##Explore Read Counts
# Calculate the total number of reads per sample.
raw_TotalSeqs_df_combined <-
midroot_physeq %>%
# calculate the sample read sums
sample_sums() %>%
data.frame()
# name the column
colnames(raw_TotalSeqs_df_combined)[1] <- "TotalSeqs"
raw_TotalSeqs_df_DNA <-
midroot_physeq_DNA %>%
# calculate the sample read sums
sample_sums() %>%
data.frame()
# name the column
colnames(raw_TotalSeqs_df_DNA)[1] <- "TotalSeqs"
raw_TotalSeqs_df_RNA <-
midroot_physeq_RNA %>%
# calculate the sample read sums
sample_sums() %>%
data.frame()
# name the column
colnames(raw_TotalSeqs_df_RNA)[1] <- "TotalSeqs"
head(raw_TotalSeqs_df_combined)
## TotalSeqs
## 110D2W115D 13235
## 110D2W115R 12953
## 110D2W415D 14765
## 110D2W415R 11587
## 110D2W515D 18867
## 110D2W515R 12432
head(raw_TotalSeqs_df_DNA)
## TotalSeqs
## 110D2W115D 13235
## 110D2W415D 14765
## 110D2W515D 18867
## 110D2W615D 9289
## 110D2W715D 14757
## 110D2W815D 36720
head(raw_TotalSeqs_df_RNA)
## TotalSeqs
## 110D2W115R 12953
## 110D2W415R 11587
## 110D2W515R 12432
## 110D2W615R 16887
## 110D2W715R 14711
## 110D2W815R 10574
# make histogram of raw reads
raw_TotalSeqs_df_combined %>%
ggplot(aes(x = TotalSeqs)) +
geom_histogram(bins = 50) +
scale_x_continuous(limits = c(0, 10000)) +
labs(title = "Raw Combined Sequencing Depth Distribution") +
theme_classic()
## Warning: Removed 74 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).
raw_TotalSeqs_df_DNA %>%
ggplot(aes(x = TotalSeqs)) +
geom_histogram(bins = 50) +
scale_x_continuous(limits = c(0, 10000)) +
labs(title = "Raw DNA Sequencing Depth Distribution") +
theme_classic()
## Warning: Removed 43 rows containing non-finite outside the scale range (`stat_bin()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).
raw_TotalSeqs_df_RNA %>%
ggplot(aes(x = TotalSeqs)) +
geom_histogram(bins = 50) +
scale_x_continuous(limits = c(0, 10000)) +
labs(title = "Raw RNA Sequencing Depth Distribution") +
theme_classic()
## Warning: Removed 31 rows containing non-finite outside the scale range (`stat_bin()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).
#What are the minimum number of sequences for each rooted physeq object?
midroot_physeq %>% sample_sums() %>% min()
## [1] 2142
midroot_physeq_DNA %>% sample_sums() %>% min()
## [1] 2582
midroot_physeq_RNA %>% sample_sums() %>% min()
## [1] 2142
#The minimum number of sequences for DNA samples = 2582 and for RNA samples = 2142
#In order to normailze the read counts we first need to import a scale read function from http://deneflab.github.io/MicrobeMiseq/
matround <- function(x){trunc(x+0.5)}
scale_reads <- function(physeq, n = min(sample_sums(physeq)), round = "round") {
# transform counts to n
physeq.scale <- transform_sample_counts(physeq, function(x) {(n * x/sum(x))})
# Pick the rounding functions
if (round == "floor"){
otu_table(physeq.scale) <- floor(otu_table(physeq.scale))
} else if (round == "round"){
otu_table(physeq.scale) <- round(otu_table(physeq.scale))
} else if (round == "matround"){
otu_table(physeq.scale) <- matround(otu_table(physeq.scale))
}
# Prune taxa and return new phyloseq object
physeq.scale <- prune_taxa(taxa_sums(physeq.scale) > 0, physeq.scale)
return(physeq.scale)
}
#Scale the reads using the above function, not we are scaling each object by their minimum number for sequences.
# Scale reads combined DNA/RNA by the above function
scaled_rooted_physeq <-
midroot_physeq %>%
scale_reads(round = "matround")
# Scale reads DNA samples by the above function
scaled_rooted_physeq_DNA <-
midroot_physeq_DNA %>%
scale_reads(round = "matround")
# Scale reads RNA samples by the above function
scaled_rooted_physeq_RNA <-
midroot_physeq_RNA %>%
scale_reads(round = "matround")
# Calculate combined DNA//RNA read depth
scaled_TotalSeqs_df <-
scaled_rooted_physeq %>%
sample_sums() %>%
data.frame()
colnames(scaled_TotalSeqs_df)[1] <-"TotalSeqs"
head(scaled_TotalSeqs_df) #Inspect to see if the scaling occured
## TotalSeqs
## 110D2W115D 2143
## 110D2W115R 2130
## 110D2W415D 2137
## 110D2W415R 2140
## 110D2W515D 2146
## 110D2W515R 2147
# Calculate DNA samples read depth
scaled_TotalSeqs_DNA_df <-
scaled_rooted_physeq_DNA %>%
sample_sums() %>%
data.frame()
colnames(scaled_TotalSeqs_DNA_df)[1] <-"TotalSeqs"
head(scaled_TotalSeqs_DNA_df) #Inspect to see if the scaling occured
## TotalSeqs
## 110D2W115D 2581
## 110D2W415D 2583
## 110D2W515D 2584
## 110D2W615D 2586
## 110D2W715D 2583
## 110D2W815D 2568
# Calculate RNA samples read depth
scaled_TotalSeqs_RNA_df <-
scaled_rooted_physeq_RNA %>%
sample_sums() %>%
data.frame()
colnames(scaled_TotalSeqs_RNA_df)[1] <-"TotalSeqs"
# Inspect
head(scaled_TotalSeqs_RNA_df)
## TotalSeqs
## 110D2W115R 2130
## 110D2W415R 2140
## 110D2W515R 2147
## 110D2W615R 2138
## 110D2W715R 2142
## 110D2W815R 2135
#Check the range of the data
#For Combined,DNA and RNA
min_seqs_combined <- min(scaled_TotalSeqs_df$TotalSeqs);min_seqs_combined
## [1] 2117
min_seqs_DNA <- min(scaled_TotalSeqs_DNA_df$TotalSeqs); min_seqs_DNA
## [1] 2568
min_seqs_RNA <- min(scaled_TotalSeqs_RNA_df$TotalSeqs); min_seqs_RNA
## [1] 2129
max_seqs_combined <- max(scaled_TotalSeqs_df$TotalSeqs);max_seqs_combined
## [1] 2173
max_seqs_DNA <- max(scaled_TotalSeqs_DNA_df$TotalSeqs); max_seqs_DNA
## [1] 2595
max_seqs_RNA <- max(scaled_TotalSeqs_RNA_df$TotalSeqs); max_seqs_RNA
## [1] 2173
#Range
max_seqs_combined - min_seqs_combined
## [1] 56
max_seqs_DNA - min_seqs_DNA
## [1] 27
max_seqs_RNA - min_seqs_RNA
## [1] 44
# Plot Histogram
scaled_TotalSeqs_df %>%
ggplot(aes(x = TotalSeqs)) +
geom_histogram(bins = 50) +
scale_x_continuous(limits = c(0, 10000)) +
labs(title = "Scaled Sequencing Depth at 2142") +
theme_classic()
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).
scaled_TotalSeqs_DNA_df %>%
ggplot(aes(x = TotalSeqs)) +
geom_histogram(bins = 50) +
scale_x_continuous(limits = c(0, 10000)) +
labs(title = "Scaled Sequencing Depth of DNA only samples at 2582") +
theme_classic()
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).
scaled_TotalSeqs_RNA_df %>%
ggplot(aes(x = TotalSeqs)) +
geom_histogram(bins = 50) +
scale_x_continuous(limits = c(0, 10000)) +
labs(title = "Scaled Sequencing Depth of RNA only samples at 2142") +
theme_classic()
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).
#We can now start running PCoA with respect to Soreson and Bray-Curtis
# Calculate sorensen dissimilarity: Abundance-unweighted of shared taxa
scaled_soren_pcoa_DNA <-
ordinate(
physeq = scaled_rooted_physeq_DNA,
method = "PCoA",
distance = "bray", binary = TRUE)
#str(scaled_soren_pcoa_DNA)
# Plot the ordination
soren_limnion_pcoa_DNA <- plot_ordination(
physeq = scaled_rooted_physeq_DNA,
ordination = scaled_soren_pcoa_DNA,
color = "lakesite",
shape = "limnion",
title = "Sorensen PCoA DNA") +
geom_point(size=5, alpha = 0.5, aes(color = lakesite)) +
scale_shape_manual(values = c(15,16,17,18, 19, 20)) +
scale_color_manual(values = lakesite_colors) +
theme_bw()
# Show the plot
soren_limnion_pcoa_DNA
# Calculate sorensen dissimilarity: Abundance-unweighted of shared taxa
scaled_soren_pcoa_RNA <-
ordinate(
physeq = scaled_rooted_physeq_RNA,
method = "PCoA",
distance = "bray", binary = TRUE)
#str(scaled_soren_pcoa_DNA)
# Plot the ordination
soren_limnion_pcoa_RNA <- plot_ordination(
physeq = scaled_rooted_physeq_RNA,
ordination = scaled_soren_pcoa_RNA,
color = "lakesite",
shape = "limnion",
title = "Sorensen PCoA RNA") +
geom_point(size=5, alpha = 0.5, aes(color = lakesite)) +
scale_shape_manual(values = c(15,16,17,18, 19, 20)) +
scale_color_manual(values = lakesite_colors) +
theme_bw()
# Show the plot
soren_limnion_pcoa_RNA
#Running PCoA with season as the shape
# Plot the ordination
soren_season_pcoa_DNA <- plot_ordination(
physeq = scaled_rooted_physeq_DNA,
ordination = scaled_soren_pcoa_DNA,
color = "lakesite",
shape = "season",
title = "Sorensen PCoA DNA") +
geom_point(size=5, alpha = 0.5, aes(color = lakesite)) +
scale_shape_manual(values = c(15,16,17,18, 19, 20)) +
scale_color_manual(values = lakesite_colors) +
theme_bw()
# Show the plot
soren_season_pcoa_DNA
# Plot the ordination
soren_season_pcoa_RNA <- plot_ordination(
physeq = scaled_rooted_physeq_RNA,
ordination = scaled_soren_pcoa_RNA,
color = "lakesite",
shape = "season",
title = "Sorensen PCoA RNA") +
geom_point(size=5, alpha = 0.5, aes(color = lakesite)) +
scale_shape_manual(values = c(15,16,17,18, 19, 20)) +
scale_color_manual(values = lakesite_colors) +
theme_bw()
# Show the plot
soren_season_pcoa_RNA
#Comeback to combine plots
# Calculate the BC distance
scaled_BC_pcoa_DNA <-
ordinate(
physeq = scaled_rooted_physeq_DNA,
method = "PCoA",
distance = "bray")
scaled_BC_pcoa_RNA <-
ordinate(
physeq = scaled_rooted_physeq_RNA,
method = "PCoA",
distance = "bray")
# Plot the PCoA
bray_pcoa_limnion_DNA <-
plot_ordination(
physeq = scaled_rooted_physeq_DNA,
ordination = scaled_BC_pcoa_DNA,
color = "lakesite",
shape = "limnion",
title = "Bray-Curtis PCoA DNA Limnion") +
geom_point(size=5, alpha = 0.5, aes(color = lakesite)) +
scale_shape_manual(values = c(15,16,17,18, 19, 20,21)) +
scale_color_manual(values = c(lakesite_colors)) +
theme_bw()
bray_pcoa_limnion_RNA <-
plot_ordination(
physeq = scaled_rooted_physeq_RNA,
ordination = scaled_BC_pcoa_RNA,
color = "lakesite",
shape = "limnion",
title = "Bray-Curtis PCoA RNA Limnion") +
geom_point(size=5, alpha = 0.5, aes(color = lakesite)) +
scale_shape_manual(values = c(15,16,17,18, 19, 20,21)) +
scale_color_manual(values = c(lakesite_colors)) +
theme_bw()
bray_pcoa_season_DNA <-
plot_ordination(
physeq = scaled_rooted_physeq_DNA,
ordination = scaled_BC_pcoa_DNA,
color = "lakesite",
shape= "season",
title = "Bray-Curtis PCoA DNA Season") +
geom_point(size=5, alpha = 0.5, aes(color = lakesite)) +
scale_shape_manual(values = c(15,16,17,18, 19, 20,21)) +
scale_color_manual(values = c(lakesite_colors)) +
theme_bw()
bray_pcoa_season_RNA <-
plot_ordination(
physeq = scaled_rooted_physeq_RNA,
ordination = scaled_BC_pcoa_RNA,
color = "lakesite",
shape= "season",
title = "Bray-Curtis PCoA RNA Season") +
geom_point(size=5, alpha = 0.5, aes(color = lakesite)) +
scale_shape_manual(values = c(15,16,17,18, 19, 20,21)) +
scale_color_manual(values = c(lakesite_colors)) +
theme_bw()
grid.arrange(bray_pcoa_limnion_DNA,
bray_pcoa_limnion_RNA,
bray_pcoa_season_DNA,
bray_pcoa_season_RNA, ncol=4)
scaled_wUNI_pcoa_combined <-
ordinate(
physeq = scaled_rooted_physeq,
method = "PCoA",
distance = "wunifrac")
scaled_wUNI_pcoa_DNA <-
ordinate(
physeq = scaled_rooted_physeq_DNA,
method = "PCoA",
distance = "wunifrac")
scaled_wUNI_pcoa_RNA <-
ordinate(
physeq = scaled_rooted_physeq_RNA,
method = "PCoA",
distance = "wunifrac")
# Plot the PCoA
wUNI_station_pcoa_combined_season <-
plot_ordination(
physeq = scaled_rooted_physeq,
ordination = scaled_wUNI_pcoa_combined,
color = "lakesite",
shape = "season",
title = "Weighted Unifrac PCoA Combined season") +
geom_point(size=5, alpha = 0.5, aes(color = lakesite)) +
scale_shape_manual(values = c(15,16,17,18, 19, 20)) +
scale_color_manual(values = c(lakesite_colors)) +
theme_bw()
wUNI_station_pcoa_combined_limnion <-
plot_ordination(
physeq = scaled_rooted_physeq,
ordination = scaled_wUNI_pcoa_combined,
color = "lakesite",
shape = "limnion",
title = "Weighted Unifrac PCoA Combined limnion") +
geom_point(size=5, alpha = 0.5, aes(color = lakesite)) +
scale_shape_manual(values = c(15,16,17,18, 19, 20)) +
scale_color_manual(values = c(lakesite_colors)) +
theme_bw()
wUNI_station_pcoa_DNA_season <-
plot_ordination(
physeq = scaled_rooted_physeq_DNA,
ordination = scaled_wUNI_pcoa_DNA,
color = "lakesite",
shape = "season",
title = "Weighted Unifrac PCoA DNA season") +
geom_point(size=5, alpha = 0.5, aes(color = lakesite)) +
scale_shape_manual(values = c(15,16,17,18, 19, 20)) +
scale_color_manual(values = c(lakesite_colors)) +
theme_bw()
wUNI_station_pcoa_DNA_limnion <-
plot_ordination(
physeq = scaled_rooted_physeq_DNA,
ordination = scaled_wUNI_pcoa_DNA,
color = "lakesite",
shape = "limnion",
title = "Weighted Unifrac PCoA DNA limnion") +
geom_point(size=5, alpha = 0.5, aes(color = lakesite)) +
scale_shape_manual(values = c(15,16,17,18, 19, 20)) +
scale_color_manual(values = c(lakesite_colors)) +
theme_bw()
wUNI_station_pcoa_RNA_season <-
plot_ordination(
physeq = scaled_rooted_physeq_RNA,
ordination = scaled_wUNI_pcoa_RNA,
color = "lakesite",
shape = "season",
title = "Weighted Unifrac PCoA RNA season") +
geom_point(size=5, alpha = 0.5, aes(color = lakesite)) +
scale_shape_manual(values = c(15,16,17,18, 19, 20)) +
scale_color_manual(values = c(lakesite_colors)) +
theme_bw()
wUNI_station_pcoa_RNA_limnion <-
plot_ordination(
physeq = scaled_rooted_physeq_RNA,
ordination = scaled_wUNI_pcoa_RNA,
color = "lakesite",
shape = "limnion",
title = "Weighted Unifrac PCoA RNA limnion") +
geom_point(size=5, alpha = 0.5, aes(color = lakesite)) +
scale_shape_manual(values = c(15,16,17,18, 19, 20)) +
scale_color_manual(values = c(lakesite_colors)) +
theme_bw()
wUNI_station_pcoa_combined_season
wUNI_station_pcoa_combined_limnion
wUNI_station_pcoa_DNA_season
wUNI_station_pcoa_DNA_limnion
wUNI_station_pcoa_RNA_season
wUNI_station_pcoa_RNA_limnion
# Calculate the Weighted Unifrac distance
scaled_wUNI_nmds_combined <-
ordinate(
physeq = scaled_rooted_physeq,
method = "NMDS",
distance = "wunifrac")
## Run 0 stress 0.1418455
## Run 1 stress 0.14491
## Run 2 stress 0.1615657
## Run 3 stress 0.1562387
## Run 4 stress 0.166838
## Run 5 stress 0.1619733
## Run 6 stress 0.1494274
## Run 7 stress 0.1477715
## Run 8 stress 0.1550695
## Run 9 stress 0.147955
## Run 10 stress 0.1542195
## Run 11 stress 0.1449249
## Run 12 stress 0.1542195
## Run 13 stress 0.1668388
## Run 14 stress 0.1449098
## Run 15 stress 0.1418455
## ... New best solution
## ... Procrustes: rmse 1.651949e-05 max resid 0.0001469256
## ... Similar to previous best
## Run 16 stress 0.1634479
## Run 17 stress 0.1604705
## Run 18 stress 0.147952
## Run 19 stress 0.1600244
## Run 20 stress 0.1418454
## ... New best solution
## ... Procrustes: rmse 3.563792e-05 max resid 0.0003311476
## ... Similar to previous best
## *** Best solution repeated 1 times
scaled_wUNI_nmds_DNA <-
ordinate(
physeq = scaled_rooted_physeq_DNA,
method = "NMDS",
distance = "wunifrac")
## Run 0 stress 0.1925474
## Run 1 stress 0.1985646
## Run 2 stress 0.2141103
## Run 3 stress 0.1985623
## Run 4 stress 0.1925474
## ... Procrustes: rmse 4.622689e-06 max resid 1.70631e-05
## ... Similar to previous best
## Run 5 stress 0.2515052
## Run 6 stress 0.1925474
## ... Procrustes: rmse 1.281947e-05 max resid 8.138397e-05
## ... Similar to previous best
## Run 7 stress 0.1926273
## ... Procrustes: rmse 0.004161513 max resid 0.02407351
## Run 8 stress 0.1925474
## ... Procrustes: rmse 3.626435e-05 max resid 0.0002383358
## ... Similar to previous best
## Run 9 stress 0.2102099
## Run 10 stress 0.1926274
## ... Procrustes: rmse 0.004159529 max resid 0.02406611
## Run 11 stress 0.198565
## Run 12 stress 0.1985652
## Run 13 stress 0.2101843
## Run 14 stress 0.1926273
## ... Procrustes: rmse 0.004166649 max resid 0.02412437
## Run 15 stress 0.1925474
## ... Procrustes: rmse 1.676562e-05 max resid 0.0001045908
## ... Similar to previous best
## Run 16 stress 0.1985644
## Run 17 stress 0.1985654
## Run 18 stress 0.1925474
## ... Procrustes: rmse 6.392559e-06 max resid 2.872485e-05
## ... Similar to previous best
## Run 19 stress 0.2451172
## Run 20 stress 0.1925474
## ... Procrustes: rmse 7.540351e-06 max resid 4.720985e-05
## ... Similar to previous best
## *** Best solution repeated 6 times
scaled_wUNI_nmds_RNA <-
ordinate(
physeq = scaled_rooted_physeq_RNA,
method = "NMDS",
distance = "wunifrac")
## Run 0 stress 0.1645868
## Run 1 stress 0.2660364
## Run 2 stress 0.1645868
## ... New best solution
## ... Procrustes: rmse 4.067466e-06 max resid 2.252153e-05
## ... Similar to previous best
## Run 3 stress 0.1645868
## ... Procrustes: rmse 4.083904e-06 max resid 1.905353e-05
## ... Similar to previous best
## Run 4 stress 0.1645868
## ... Procrustes: rmse 2.722264e-06 max resid 1.323778e-05
## ... Similar to previous best
## Run 5 stress 0.1645868
## ... Procrustes: rmse 1.98128e-06 max resid 1.034146e-05
## ... Similar to previous best
## Run 6 stress 0.1645868
## ... Procrustes: rmse 2.849117e-06 max resid 1.05118e-05
## ... Similar to previous best
## Run 7 stress 0.1645868
## ... Procrustes: rmse 5.445529e-06 max resid 3.531163e-05
## ... Similar to previous best
## Run 8 stress 0.1645868
## ... Procrustes: rmse 2.938561e-06 max resid 1.758468e-05
## ... Similar to previous best
## Run 9 stress 0.1645868
## ... Procrustes: rmse 5.152021e-06 max resid 2.938458e-05
## ... Similar to previous best
## Run 10 stress 0.1645868
## ... Procrustes: rmse 3.21662e-06 max resid 1.860854e-05
## ... Similar to previous best
## Run 11 stress 0.2444954
## Run 12 stress 0.1645868
## ... Procrustes: rmse 4.164196e-06 max resid 2.150385e-05
## ... Similar to previous best
## Run 13 stress 0.1645868
## ... New best solution
## ... Procrustes: rmse 1.422965e-06 max resid 4.641084e-06
## ... Similar to previous best
## Run 14 stress 0.1645868
## ... Procrustes: rmse 3.82778e-06 max resid 1.75597e-05
## ... Similar to previous best
## Run 15 stress 0.2249074
## Run 16 stress 0.1645868
## ... Procrustes: rmse 3.358032e-06 max resid 2.109151e-05
## ... Similar to previous best
## Run 17 stress 0.1645868
## ... Procrustes: rmse 5.317784e-06 max resid 2.250939e-05
## ... Similar to previous best
## Run 18 stress 0.1645868
## ... Procrustes: rmse 1.990947e-06 max resid 7.963673e-06
## ... Similar to previous best
## Run 19 stress 0.1645868
## ... Procrustes: rmse 7.68143e-06 max resid 4.757098e-05
## ... Similar to previous best
## Run 20 stress 0.1645868
## ... Procrustes: rmse 2.829496e-06 max resid 1.629448e-05
## ... Similar to previous best
## *** Best solution repeated 7 times
# Plot the PCoA
wUNI_station_nmds_combined_season <-
plot_ordination(
physeq = scaled_rooted_physeq,
ordination = scaled_wUNI_nmds_combined,
color = "lakesite",
shape = "season",
title = "Weighted Unifrac NMDS combined season") +
geom_point(size=5, alpha = 0.5, aes(color = lakesite)) +
scale_shape_manual(values = c(15,16,17)) +
scale_color_manual(values = c(lakesite_colors)) +
theme_bw()
wUNI_station_nmds_combined_limnion <-
plot_ordination(
physeq = scaled_rooted_physeq,
ordination = scaled_wUNI_nmds_combined,
color = "lakesite",
shape = "limnion",
title = "Weighted Unifrac NMDS combined limnion") +
geom_point(size=5, alpha = 0.5, aes(color = lakesite)) +
scale_shape_manual(values = c(15,16,17)) +
scale_color_manual(values = c(lakesite_colors)) +
theme_bw()
wUNI_station_nmds_DNA_season <-
plot_ordination(
physeq = scaled_rooted_physeq_DNA,
ordination = scaled_wUNI_nmds_DNA,
color = "lakesite",
shape = "season",
title = "Weighted Unifrac NMDS DNA season") +
geom_point(size=5, alpha = 0.5, aes(color = lakesite)) +
scale_shape_manual(values = c(15,16,17)) +
scale_color_manual(values = c(lakesite_colors)) +
theme_bw()
wUNI_station_nmds_DNA_limnion <-
plot_ordination(
physeq = scaled_rooted_physeq_DNA,
ordination = scaled_wUNI_nmds_DNA,
color = "lakesite",
shape = "limnion",
title = "Weighted Unifrac NMDS DNA limnion") +
geom_point(size=5, alpha = 0.5, aes(color = lakesite)) +
scale_shape_manual(values = c(15,16,17)) +
scale_color_manual(values = c(lakesite_colors)) +
theme_bw()
wUNI_station_nmds_RNA_season <-
plot_ordination(
physeq = scaled_rooted_physeq_RNA,
ordination = scaled_wUNI_nmds_RNA,
color = "lakesite",
shape = "season",
title = "Weighted Unifrac NMDS RNA season") +
geom_point(size=5, alpha = 0.5, aes(color = lakesite)) +
scale_shape_manual(values = c(15,16,17)) +
scale_color_manual(values = c(lakesite_colors)) +
theme_bw()
wUNI_station_nmds_RNA_limnion <-
plot_ordination(
physeq = scaled_rooted_physeq_RNA,
ordination = scaled_wUNI_nmds_RNA,
color = "lakesite",
shape = "limnion",
title = "Weighted Unifrac NMDS RNA limnion") +
geom_point(size=5, alpha = 0.5, aes(color = lakesite)) +
scale_shape_manual(values = c(15,16,17)) +
scale_color_manual(values = c(lakesite_colors)) +
theme_bw()
wUNI_station_nmds_combined_season
wUNI_station_nmds_combined_limnion
wUNI_station_nmds_DNA_season
wUNI_station_nmds_DNA_limnion
wUNI_station_nmds_RNA_season
wUNI_station_nmds_RNA_limnion
##Statistical Significance Testing
# Calculate all three of the distance matrices
scaled_sorensen_dist_combined <- phyloseq::distance(scaled_rooted_physeq, method = "bray", binary = TRUE)
scaled_sorensen_dist_DNA <- phyloseq::distance(scaled_rooted_physeq_DNA, method = "bray", binary = TRUE)
scaled_sorensen_dist_RNA <- phyloseq::distance(scaled_rooted_physeq_RNA, method = "bray", binary = TRUE)
scaled_bray_dist_combined <- phyloseq::distance(scaled_rooted_physeq, method = "bray")
scaled_bray_dist_DNA <- phyloseq::distance(scaled_rooted_physeq_DNA, method = "bray")
scaled_bray_dist_RNA <- phyloseq::distance(scaled_rooted_physeq_RNA, method = "bray")
scaled_wUnifrac_dist_combined <- phyloseq::distance(scaled_rooted_physeq, method = "wunifrac")
scaled_wUnifrac_dist_DNA <- phyloseq::distance(scaled_rooted_physeq_DNA, method = "wunifrac")
scaled_wUnifrac_dist_RNA <- phyloseq::distance(scaled_rooted_physeq_RNA, method = "wunifrac")
# make a data frame from the sample_data
# All distance matrices will be the same metadata because they
# originate from the same phyloseq object.
metadata_combined <- data.frame(sample_data(scaled_rooted_physeq))
metadata_DNA <- data.frame(sample_data(scaled_rooted_physeq_DNA))
metadata_RNA <- data.frame(sample_data(scaled_rooted_physeq_RNA))
# Adonis test
# In this example we are testing the hypothesis that the five stations
# that were collected have different centroids in the ordination space
# for each of the dissimilarity metrics, we are using a discrete variable
adonis2(scaled_sorensen_dist_combined ~ lakesite, data = metadata_combined)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = scaled_sorensen_dist_combined ~ lakesite, data = metadata_combined)
## Df SumOfSqs R2 F Pr(>F)
## lakesite 3 5.8697 0.23886 10.983 0.001 ***
## Residual 105 18.7043 0.76114
## Total 108 24.5740 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(scaled_bray_dist_combined ~ lakesite, data = metadata_combined)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = scaled_bray_dist_combined ~ lakesite, data = metadata_combined)
## Df SumOfSqs R2 F Pr(>F)
## lakesite 3 4.4486 0.16638 6.9855 0.001 ***
## Residual 105 22.2892 0.83362
## Total 108 26.7379 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(scaled_wUnifrac_dist_combined ~ lakesite, data = metadata_combined)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = scaled_wUnifrac_dist_combined ~ lakesite, data = metadata_combined)
## Df SumOfSqs R2 F Pr(>F)
## lakesite 3 0.5782 0.08178 3.1171 0.002 **
## Residual 105 6.4923 0.91822
## Total 108 7.0705 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(scaled_sorensen_dist_DNA ~ lakesite, data = metadata_DNA)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = scaled_sorensen_dist_DNA ~ lakesite, data = metadata_DNA)
## Df SumOfSqs R2 F Pr(>F)
## lakesite 3 3.2802 0.32063 8.1804 0.001 ***
## Residual 52 6.9503 0.67937
## Total 55 10.2305 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(scaled_bray_dist_DNA ~ lakesite, data = metadata_DNA)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = scaled_bray_dist_DNA ~ lakesite, data = metadata_DNA)
## Df SumOfSqs R2 F Pr(>F)
## lakesite 3 2.5944 0.30663 7.6653 0.001 ***
## Residual 52 5.8667 0.69337
## Total 55 8.4611 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(scaled_wUnifrac_dist_DNA ~ lakesite, data = metadata_DNA)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = scaled_wUnifrac_dist_DNA ~ lakesite, data = metadata_DNA)
## Df SumOfSqs R2 F Pr(>F)
## lakesite 3 0.28642 0.17106 3.5768 0.001 ***
## Residual 52 1.38801 0.82894
## Total 55 1.67444 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(scaled_sorensen_dist_RNA ~ lakesite, data = metadata_RNA)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = scaled_sorensen_dist_RNA ~ lakesite, data = metadata_RNA)
## Df SumOfSqs R2 F Pr(>F)
## lakesite 3 3.1514 0.25969 5.7294 0.001 ***
## Residual 49 8.9840 0.74031
## Total 52 12.1354 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(scaled_bray_dist_RNA ~ lakesite, data = metadata_RNA)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = scaled_bray_dist_RNA ~ lakesite, data = metadata_RNA)
## Df SumOfSqs R2 F Pr(>F)
## lakesite 3 2.7364 0.23186 4.9302 0.001 ***
## Residual 49 9.0656 0.76814
## Total 52 11.8020 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(scaled_wUnifrac_dist_RNA ~ lakesite, data = metadata_RNA)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = scaled_wUnifrac_dist_RNA ~ lakesite, data = metadata_RNA)
## Df SumOfSqs R2 F Pr(>F)
## lakesite 3 0.420 0.18049 3.5973 0.001 ***
## Residual 49 1.907 0.81951
## Total 52 2.327 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##BetaDispR
# Homogeneity of Disperson test with beta dispr
# Sorensen
beta_soren_station_combined <- betadisper(scaled_sorensen_dist_combined, metadata_combined$lakesite)
permutest(beta_soren_station_combined)
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 3 0.14349 0.047831 5.6975 999 0.004 **
## Residuals 105 0.88148 0.008395
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Bray-curtis
beta_bray_station_combined <- betadisper(scaled_bray_dist_combined, metadata_combined$lakesite)
permutest(beta_bray_station_combined)
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 3 0.04498 0.014993 1.7103 999 0.153
## Residuals 105 0.92043 0.008766
# Weighted Unifrac
beta_unifrac_station_combined <- betadisper(scaled_wUnifrac_dist_combined, metadata_combined$lakesite)
permutest(beta_unifrac_station_combined)
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 3 0.001152 0.00038413 0.1343 999 0.928
## Residuals 105 0.300301 0.00286001
#Homogeneity of Dispersion test with beta dispr
#Soerson DNA
beta_soren_station_DNA <- betadisper(scaled_sorensen_dist_DNA, metadata_DNA$lakesite)
permutest(beta_soren_station_DNA)
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 3 0.06805 0.0226834 2.5481 999 0.056 .
## Residuals 52 0.46291 0.0089021
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Bray-curtis DNA
beta_bray_station_DNA <- betadisper(scaled_bray_dist_DNA, metadata_DNA$lakesite)
permutest(beta_bray_station_DNA)
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 3 0.03891 0.0129706 1.6156 999 0.217
## Residuals 52 0.41748 0.0080285
# Weighted Unifrac DNA
beta_bray_unifrac_DNA <- betadisper(scaled_wUnifrac_dist_DNA, metadata_DNA$lakesite)
permutest(beta_bray_unifrac_DNA)
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 3 0.007897 0.0026323 1.1012 999 0.364
## Residuals 52 0.124307 0.0023905
#Homogeneity of Dispersion test with beta dispr
#Soerson RNA
beta_soren_station_RNA <- betadisper(scaled_sorensen_dist_RNA, metadata_RNA$lakesite)
permutest(beta_soren_station_RNA)
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 3 0.09634 0.032114 3.6571 999 0.02 *
## Residuals 49 0.43028 0.008781
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Bray-curtis RNA
beta_bray_station_RNA <- betadisper(scaled_bray_dist_RNA, metadata_RNA$lakesite)
permutest(beta_bray_station_RNA)
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 3 0.07377 0.024588 2.3909 999 0.096 .
## Residuals 49 0.50393 0.010284
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Weighted Unifrac rNA
beta_bray_unifrac_RNA <- betadisper(scaled_wUnifrac_dist_RNA, metadata_RNA$lakesite)
permutest(beta_bray_unifrac_RNA)
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 3 0.002733 0.00091112 0.2955 999 0.835
## Residuals 49 0.151078 0.00308323
#Phylum
# Set the phylum colors
phylum_colors <- c(
Acidobacteriota = "navy",
Actinobacteriota = "darkslategray2",
Armatimonadota = "deeppink1",
Alphaproteobacteria = "plum2",
Bacteroidota = "gold",
Betaproteobacteria = "plum1",
Bdellovibrionota = "red1",
Chloroflexi="black",
Crenarchaeota = "firebrick",
Cyanobacteria = "limegreen",
Deltaproteobacteria = "grey",
Desulfobacterota="magenta",
Firmicutes = "#3E9B96",
Gammaproteobacteria = "greenyellow",
"Marinimicrobia (SAR406 clade)" = "yellow",
Myxococcota = "#B5D6AA",
Nitrospirota = "palevioletred1",
Proteobacteria = "royalblue",
Planctomycetota = "darkorange",
"SAR324 clade(Marine group B)" = "olivedrab",
#Proteobacteria_unclassified = "greenyellow",
Thermoplasmatota = "green",
Verrucomicrobiota = "darkorchid1")
# Other = "grey")
# Calculate the phylum relative abundance
# Make different dfs for combined, DNA and RNA samples
phylum_df_combined <-
scaled_rooted_physeq %>%
# agglomerate at the phylum level
tax_glom(taxrank = "Phylum") %>%
# Transform counts to relative abundance
transform_sample_counts(function (x) {x/sum(x)}) %>%
# Melt to a long format
psmelt() %>%
# Filter out Phyla that are <1% - get rid of low abundant Phyla
dplyr::filter(Abundance > 0.01)
phylum_df_DNA <-
scaled_rooted_physeq_DNA %>%
# agglomerate at the phylum level
tax_glom(taxrank = "Phylum") %>%
# Transform counts to relative abundance
transform_sample_counts(function (x) {x/sum(x)}) %>%
# Melt to a long format
psmelt() %>%
# Filter out Phyla that are <1% - get rid of low abundant Phyla
dplyr::filter(Abundance > 0.01)
phylum_df_RNA <-
scaled_rooted_physeq_RNA %>%
# agglomerate at the phylum level
tax_glom(taxrank = "Phylum") %>%
# Transform counts to relative abundance
transform_sample_counts(function (x) {x/sum(x)}) %>%
# Melt to a long format
psmelt() %>%
# Filter out Phyla that are <1% - get rid of low abundant Phyla
dplyr::filter(Abundance > 0.01)
#Combined Phyla Plots
# Stacked Bar Plot With All phyla
# Plot Phylum Abundances - make sure to load phylum_colors
phylum_df_combined %>%
# Warning: It's important to have one sample per x value,
# otherwise, it will take the sum between multiple samples
dplyr::filter(limnion == "Bottom") %>%
ggplot(aes(x = lakesite, y = Abundance, fill = Phylum)) +
facet_grid(Phylum~season, scale = "free") +
geom_bar(stat = "identity", color = "black") +
labs(title = "Bottom Limnion Phylum Composition") +
scale_fill_manual(values = phylum_colors) +
theme_bw() +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1))
phylum_df_combined %>%
# Warning: It's important to have one sample per x value,
# otherwise, it will take the sum between multiple samples
dplyr::filter(limnion == "Middle") %>%
ggplot(aes(x = lakesite, y = Abundance, fill = Phylum)) +
facet_grid(Phylum~season, scale = "free") +
geom_bar(stat = "identity", color = "black") +
labs(title = "Middle Limnion Phylum Composition") +
scale_fill_manual(values = phylum_colors) +
theme_bw() +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1))
phylum_df_combined %>%
# Warning: It's important to have one sample per x value,
# otherwise, it will take the sum between multiple samples
dplyr::filter(limnion == "Top") %>%
ggplot(aes(x = lakesite, y = Abundance, fill = Phylum)) +
facet_grid(Phylum~season, scale = "free") +
geom_bar(stat = "identity", color = "black") +
labs(title = "Top Limnion Phylum Composition") +
scale_fill_manual(values = phylum_colors) +
theme_bw() +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1))
#Phyla plots from DNA samples
phylum_df_DNA %>%
# Warning: It's important to have one sample per x value,
# otherwise, it will take the sum between multiple samples
dplyr::filter(limnion == "Top") %>%
ggplot(aes(x = lakesite, y = Abundance, fill = Phylum)) +
facet_grid(Phylum~season, scale = "free") +
geom_bar(stat = "identity", color = "black") +
labs(title = "Top Limnion Phylum Composition DNA") +
scale_fill_manual(values = phylum_colors) +
theme_bw() +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1))
phylum_df_DNA %>%
# Warning: It's important to have one sample per x value,
# otherwise, it will take the sum between multiple samples
dplyr::filter(limnion == "Middle") %>%
ggplot(aes(x = lakesite, y = Abundance, fill = Phylum)) +
facet_grid(Phylum~season, scale = "free") +
geom_bar(stat = "identity", color = "black") +
labs(title = "Middle Limnion Phylum Composition DNA") +
scale_fill_manual(values = phylum_colors) +
theme_bw() +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1))
phylum_df_DNA %>%
# Warning: It's important to have one sample per x value,
# otherwise, it will take the sum between multiple samples
dplyr::filter(limnion == "Bottom") %>%
ggplot(aes(x = lakesite, y = Abundance, fill = Phylum)) +
facet_grid(Phylum~season, scale = "free") +
geom_bar(stat = "identity", color = "black") +
labs(title = "Bottom Limnion Phylum Composition DNA") +
scale_fill_manual(values = phylum_colors) +
theme_bw() +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1))
#Phyla from RNA samples
phylum_df_RNA <-
scaled_rooted_physeq_RNA %>%
# agglomerate at the phylum level
tax_glom(taxrank = "Phylum") %>%
# Transform counts to relative abundance
transform_sample_counts(function (x) {x/sum(x)}) %>%
# Melt to a long format
psmelt() %>%
# Filter out Phyla that are <1% - get rid of low abundant Phyla
dplyr::filter(Abundance > 0.01)
phylum_df_RNA %>%
# Warning: It's important to have one sample per x value,
# otherwise, it will take the sum between multiple samples
dplyr::filter(limnion == "Middle") %>%
ggplot(aes(x = lakesite, y = Abundance, fill = Phylum)) +
facet_grid(Phylum~season, scale = "free") +
geom_bar(stat = "identity", color = "black") +
labs(title = "Middle Limnion Phylum Composition RNA") +
scale_fill_manual(values = phylum_colors) +
theme_bw() +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1))
phylum_df_RNA %>%
# Warning: It's important to have one sample per x value,
# otherwise, it will take the sum between multiple samples
dplyr::filter(limnion == "Bottom") %>%
ggplot(aes(x = lakesite, y = Abundance, fill = Phylum)) +
facet_grid(Phylum~season, scale = "free") +
geom_bar(stat = "identity", color = "black") +
labs(title = "Bottom Limnion Phylum Composition RNA") +
scale_fill_manual(values = phylum_colors) +
theme_bw() +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1))
phylum_df_RNA %>%
# Warning: It's important to have one sample per x value,
# otherwise, it will take the sum between multiple samples
dplyr::filter(limnion == "Top") %>%
ggplot(aes(x = lakesite, y = Abundance, fill = Phylum)) +
facet_grid(Phylum~season, scale = "free") +
geom_bar(stat = "identity", color = "black") +
labs(title = "Top Limnion Phylum Composition RNA") +
scale_fill_manual(values = phylum_colors) +
theme_bw() +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1))
#Narrowing in on specific phyla
# Narrow in on a specific group
# Actinobacteriota - y: abundance, x: station, dot plot + boxplot
##Family
# Calculate the Family relative abundance for each sample
# # Make separate dfs for combined, DNA and RNA samples
family_df_combined <-
scaled_rooted_physeq %>%
# agglomerate at the phylum level
tax_glom(taxrank = "Family") %>%
# Transform counts to relative abundance
transform_sample_counts(function (x) {x/sum(x)}) %>%
# Melt to a long format
psmelt() %>%
# Filter out Phyla that are <1% - get rid of low abundant Phyla
dplyr::filter(Abundance > 0.01)
family_df_DNA <-
scaled_rooted_physeq_DNA %>%
# agglomerate at the phylum level
tax_glom(taxrank = "Family") %>%
# Transform counts to relative abundance
transform_sample_counts(function (x) {x/sum(x)}) %>%
# Melt to a long format
psmelt() %>%
# Filter out Phyla that are <1% - get rid of low abundant Phyla
dplyr::filter(Abundance > 0.01)
family_df_RNA <-
scaled_rooted_physeq_RNA %>%
# agglomerate at the phylum level
tax_glom(taxrank = "Family") %>%
# Transform counts to relative abundance
transform_sample_counts(function (x) {x/sum(x)}) %>%
# Melt to a long format
psmelt() %>%
# Filter out Phyla that are <1% - get rid of low abundant Phyla
dplyr::filter(Abundance > 0.01)
#Judging from the Phylum plots we identified Cyanobacteria,Actinobacteriota and Verrucomicrobiota as the most interesting Phyla to discet further as they seem to be quite abudant and vary between season and limnion.
# Cyanobacteria Families in DNA sample
# Plot family
cyano_season_DNA <- family_df_DNA %>%
dplyr::filter(Phylum == "Cyanobacteria") %>%
# build the plot
ggplot(aes(x = lakesite, y = Abundance,
fill = lakesite, color = lakesite)) +
facet_grid(Family~season, scale = "free") +
geom_boxplot(alpha = 0.5, outlier.shape = NA) + # outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Cyanobacteria Family Abundance") +
scale_color_manual(values = lakesite_colors) +
scale_fill_manual(values = lakesite_colors) +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1),
legend.position = "bottom")
actin_season_dna <- family_df_DNA %>%
dplyr::filter(Phylum == "Actinobacteriota") %>%
# build the plot
ggplot(aes(x = lakesite, y = Abundance,
fill = lakesite, color = lakesite)) +
facet_grid(Family~season, scale = "free") +
geom_boxplot(alpha = 0.5, outlier.shape = NA) + # outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Actinobacteriota Family Abundance") +
scale_color_manual(values = lakesite_colors) +
scale_fill_manual(values = lakesite_colors) +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1),
legend.position = "bottom")
#actin_dna sanity check
verru_season_dna <- family_df_DNA %>%
dplyr::filter(Phylum == "Verrucomicrobiota") %>%
# build the plot
ggplot(aes(x = lakesite, y = Abundance,
fill = lakesite, color = lakesite)) +
facet_grid(Family~season, scale = "free") +
geom_boxplot(alpha = 0.5, outlier.shape = NA) + # outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = " VerrucomicrobiotaFamily Abundance") +
scale_color_manual(values = lakesite_colors) +
scale_fill_manual(values = lakesite_colors) +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1),
legend.position = "bottom")
#verru_dna sanity check
#Now we will check the DNA families faceting by limnion
# Cyanobacteria Families in DNA sample
# Plot family
cyano_lim_DNA <- family_df_DNA %>%
dplyr::filter(Phylum == "Cyanobacteria") %>%
# build the plot
ggplot(aes(x = lakesite, y = Abundance,
fill = lakesite, color = lakesite)) +
facet_grid(Family~limnion, scale = "free") +
geom_boxplot(alpha = 0.5, outlier.shape = NA) + # outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Cyanobacteria Family Abundance") +
scale_color_manual(values = lakesite_colors) +
scale_fill_manual(values = lakesite_colors) +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1),
legend.position = "bottom")
actin_lim_dna <- family_df_DNA %>%
dplyr::filter(Phylum == "Actinobacteriota") %>%
# build the plot
ggplot(aes(x = lakesite, y = Abundance,
fill = lakesite, color = lakesite)) +
facet_grid(Family~limnion, scale = "free") +
geom_boxplot(alpha = 0.5, outlier.shape = NA) + # outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Actinobacteriota Family Abundance") +
scale_color_manual(values = lakesite_colors) +
scale_fill_manual(values = lakesite_colors) +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1),
legend.position = "bottom")
#actin_dna sanity check
verru_limnion_dna <- family_df_DNA %>%
dplyr::filter(Phylum == "Verrucomicrobiota") %>%
# build the plot
ggplot(aes(x = lakesite, y = Abundance,
fill = lakesite, color = lakesite)) +
facet_grid(Family~limnion, scale = "free") +
geom_boxplot(alpha = 0.5, outlier.shape = NA) + # outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = " VerrucomicrobiotaFamily Abundance") +
scale_color_manual(values = lakesite_colors) +
scale_fill_manual(values = lakesite_colors) +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1),
legend.position = "bottom")
#verru_dna sanity check
#RNA Samples Plot
cyano_season_RNA <- family_df_RNA %>%
dplyr::filter(Phylum == "Cyanobacteria") %>%
# build the plot
ggplot(aes(x = lakesite, y = Abundance,
fill = lakesite, color = lakesite)) +
facet_grid(Family~season, scale = "free") +
geom_boxplot(alpha = 0.5, outlier.shape = NA) + # outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Cyanobacteria RNA Family Abundance") +
scale_color_manual(values = lakesite_colors) +
scale_fill_manual(values = lakesite_colors) +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1),
legend.position = "bottom")
actin_season_rna <- family_df_RNA %>%
dplyr::filter(Phylum == "Actinobacteriota") %>%
# build the plot
ggplot(aes(x = lakesite, y = Abundance,
fill = lakesite, color = lakesite)) +
facet_grid(Family~season, scale = "free") +
geom_boxplot(alpha = 0.5, outlier.shape = NA) + # outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Actinobacteriota Family RNA Abundance") +
scale_color_manual(values = lakesite_colors) +
scale_fill_manual(values = lakesite_colors) +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1),
legend.position = "bottom")
#actin_dna sanity check
verru_season_rna <- family_df_RNA %>%
dplyr::filter(Phylum == "Verrucomicrobiota") %>%
# build the plot
ggplot(aes(x = lakesite, y = Abundance,
fill = lakesite, color = lakesite)) +
facet_grid(Family~season, scale = "free") +
geom_boxplot(alpha = 0.5, outlier.shape = NA) + # outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = " Verrucomicrobiota Family RNA Abundance") +
scale_color_manual(values = lakesite_colors) +
scale_fill_manual(values = lakesite_colors) +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1),
legend.position = "bottom")
#RNA Limnion Samples
cyano_lim_RNA <- family_df_RNA %>%
dplyr::filter(Phylum == "Cyanobacteria") %>%
# build the plot
ggplot(aes(x = lakesite, y = Abundance,
fill = lakesite, color = lakesite)) +
facet_grid(Family~limnion, scale = "free") +
geom_boxplot(alpha = 0.5, outlier.shape = NA) + # outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Cyanobacteria RNA Family Abundance") +
scale_color_manual(values = lakesite_colors) +
scale_fill_manual(values = lakesite_colors) +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1),
legend.position = "bottom")
actin_lim_rna <- family_df_RNA %>%
dplyr::filter(Phylum == "Actinobacteriota") %>%
# build the plot
ggplot(aes(x = lakesite, y = Abundance,
fill = lakesite, color = lakesite)) +
facet_grid(Family~limnion, scale = "free") +
geom_boxplot(alpha = 0.5, outlier.shape = NA) + # outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Actinobacteriota Family RNA Abundance") +
scale_color_manual(values = lakesite_colors) +
scale_fill_manual(values = lakesite_colors) +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1),
legend.position = "bottom")
#actin_dna sanity check
verru_lim_rna <- family_df_RNA %>%
dplyr::filter(Phylum == "Verrucomicrobiota") %>%
# build the plot
ggplot(aes(x = lakesite, y = Abundance,
fill = lakesite, color = lakesite)) +
facet_grid(Family~limnion, scale = "free") +
geom_boxplot(alpha = 0.5, outlier.shape = NA) + # outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = " Verrucomicrobiota Family RNA Abundance") +
scale_color_manual(values = lakesite_colors) +
scale_fill_manual(values = lakesite_colors) +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1),
legend.position = "bottom")
verru_lim_rna
actin_lim_rna
##Genus
# Calculate the genus relative abundance for each sample
# Make separate dfs for combined, DNA and RNA samples
genus_df <-
scaled_rooted_physeq %>%
# agglomerate at the phylum level
tax_glom(taxrank = "Genus") %>%
# Transform counts to relative abundance
transform_sample_counts(function (x) {x/sum(x)}) %>%
# Melt to a long format
psmelt() %>%
# Filter out Phyla that are <1% - get rid of low abundant Phyla
dplyr::filter(Abundance > 0.01)
genus_df_DNA <-
scaled_rooted_physeq_DNA %>%
# agglomerate at the phylum level
tax_glom(taxrank = "Genus") %>%
# Transform counts to relative abundance
transform_sample_counts(function (x) {x/sum(x)}) %>%
# Melt to a long format
psmelt() %>%
# Filter out Phyla that are <1% - get rid of low abundant Phyla
dplyr::filter(Abundance > 0.01)
genus_df_RNA <-
scaled_rooted_physeq_RNA %>%
# agglomerate at the phylum level
tax_glom(taxrank = "Genus") %>%
# Transform counts to relative abundance
transform_sample_counts(function (x) {x/sum(x)}) %>%
# Melt to a long format
psmelt() %>%
# Filter out Phyla that are <1% - get rid of low abundant Phyla
dplyr::filter(Abundance > 0.01)
# Actinobacteriota
# Plot genus
# Cyanobacteria
# Plot genus
##Session Information
devtools::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 4.3.2 (2023-10-31)
## os Rocky Linux 9.0 (Blue Onyx)
## system x86_64, linux-gnu
## ui X11
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz America/New_York
## date 2024-04-30
## pandoc 3.1.1 @ /usr/lib/rstudio-server/bin/quarto/bin/tools/ (via rmarkdown)
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## package * version date (UTC) lib source
## ade4 1.7-22 2023-02-06 [1] CRAN (R 4.3.2)
## ape 5.7-1 2023-03-13 [2] CRAN (R 4.3.2)
## Biobase 2.62.0 2023-10-24 [2] Bioconductor
## BiocGenerics 0.48.1 2023-11-01 [2] Bioconductor
## biomformat 1.30.0 2023-10-24 [1] Bioconductor
## Biostrings 2.70.1 2023-10-25 [2] Bioconductor
## bitops 1.0-7 2021-04-24 [2] CRAN (R 4.3.2)
## bslib 0.5.1 2023-08-11 [2] CRAN (R 4.3.2)
## cachem 1.0.8 2023-05-01 [2] CRAN (R 4.3.2)
## callr 3.7.3 2022-11-02 [2] CRAN (R 4.3.2)
## cli 3.6.1 2023-03-23 [2] CRAN (R 4.3.2)
## cluster 2.1.4 2022-08-22 [2] CRAN (R 4.3.2)
## codetools 0.2-19 2023-02-01 [2] CRAN (R 4.3.2)
## colorspace 2.1-0 2023-01-23 [2] CRAN (R 4.3.2)
## crayon 1.5.2 2022-09-29 [2] CRAN (R 4.3.2)
## data.table 1.14.8 2023-02-17 [2] CRAN (R 4.3.2)
## devtools * 2.4.4 2022-07-20 [2] CRAN (R 4.2.1)
## digest 0.6.33 2023-07-07 [2] CRAN (R 4.3.2)
## dplyr * 1.1.3 2023-09-03 [2] CRAN (R 4.3.2)
## ellipsis 0.3.2 2021-04-29 [2] CRAN (R 4.3.2)
## evaluate 0.23 2023-11-01 [2] CRAN (R 4.3.2)
## fansi 1.0.5 2023-10-08 [2] CRAN (R 4.3.2)
## farver 2.1.1 2022-07-06 [2] CRAN (R 4.3.2)
## fastmap 1.1.1 2023-02-24 [2] CRAN (R 4.3.2)
## forcats * 1.0.0 2023-01-29 [1] CRAN (R 4.3.2)
## foreach 1.5.2 2022-02-02 [2] CRAN (R 4.3.2)
## fs 1.6.3 2023-07-20 [2] CRAN (R 4.3.2)
## generics 0.1.3 2022-07-05 [2] CRAN (R 4.3.2)
## GenomeInfoDb 1.38.0 2023-10-24 [2] Bioconductor
## GenomeInfoDbData 1.2.11 2023-11-07 [2] Bioconductor
## ggplot2 * 3.5.0 2024-02-23 [2] CRAN (R 4.3.2)
## glue 1.6.2 2022-02-24 [2] CRAN (R 4.3.2)
## gridExtra * 2.3 2017-09-09 [1] CRAN (R 4.3.2)
## gtable 0.3.4 2023-08-21 [2] CRAN (R 4.3.2)
## highr 0.10 2022-12-22 [2] CRAN (R 4.3.2)
## hms 1.1.3 2023-03-21 [1] CRAN (R 4.3.2)
## htmltools 0.5.7 2023-11-03 [2] CRAN (R 4.3.2)
## htmlwidgets 1.6.2 2023-03-17 [2] CRAN (R 4.3.2)
## httpuv 1.6.12 2023-10-23 [2] CRAN (R 4.3.2)
## igraph 1.5.1 2023-08-10 [2] CRAN (R 4.3.2)
## IRanges 2.36.0 2023-10-24 [2] Bioconductor
## iterators 1.0.14 2022-02-05 [2] CRAN (R 4.3.2)
## jquerylib 0.1.4 2021-04-26 [2] CRAN (R 4.3.2)
## jsonlite 1.8.7 2023-06-29 [2] CRAN (R 4.3.2)
## knitr 1.45 2023-10-30 [2] CRAN (R 4.3.2)
## labeling 0.4.3 2023-08-29 [2] CRAN (R 4.3.2)
## later 1.3.1 2023-05-02 [2] CRAN (R 4.3.2)
## lattice * 0.21-9 2023-10-01 [2] CRAN (R 4.3.2)
## lifecycle 1.0.3 2022-10-07 [2] CRAN (R 4.3.2)
## lubridate * 1.9.3 2023-09-27 [1] CRAN (R 4.3.2)
## magrittr 2.0.3 2022-03-30 [2] CRAN (R 4.3.2)
## MASS 7.3-60 2023-05-04 [2] CRAN (R 4.3.2)
## Matrix 1.6-1.1 2023-09-18 [2] CRAN (R 4.3.2)
## memoise 2.0.1 2021-11-26 [2] CRAN (R 4.3.2)
## mgcv 1.9-0 2023-07-11 [2] CRAN (R 4.3.2)
## mime 0.12 2021-09-28 [2] CRAN (R 4.3.2)
## miniUI 0.1.1.1 2018-05-18 [2] CRAN (R 4.3.2)
## multtest 2.58.0 2023-10-24 [1] Bioconductor
## munsell 0.5.0 2018-06-12 [2] CRAN (R 4.3.2)
## nlme 3.1-163 2023-08-09 [2] CRAN (R 4.3.2)
## pacman 0.5.1 2019-03-11 [1] CRAN (R 4.3.2)
## patchwork * 1.2.0.9000 2024-04-28 [1] Github (thomasp85/patchwork@d943757)
## permute * 0.9-7 2022-01-27 [1] CRAN (R 4.3.2)
## phyloseq * 1.46.0 2023-10-24 [1] Bioconductor
## pillar 1.9.0 2023-03-22 [2] CRAN (R 4.3.2)
## pkgbuild 1.4.2 2023-06-26 [2] CRAN (R 4.3.2)
## pkgconfig 2.0.3 2019-09-22 [2] CRAN (R 4.3.2)
## pkgload 1.3.3 2023-09-22 [2] CRAN (R 4.3.2)
## plyr 1.8.9 2023-10-02 [2] CRAN (R 4.3.2)
## prettyunits 1.2.0 2023-09-24 [2] CRAN (R 4.3.2)
## processx 3.8.2 2023-06-30 [2] CRAN (R 4.3.2)
## profvis 0.3.8 2023-05-02 [2] CRAN (R 4.3.2)
## promises 1.2.1 2023-08-10 [2] CRAN (R 4.3.2)
## ps 1.7.5 2023-04-18 [2] CRAN (R 4.3.2)
## purrr * 1.0.2 2023-08-10 [2] CRAN (R 4.3.2)
## R6 2.5.1 2021-08-19 [2] CRAN (R 4.3.2)
## Rcpp 1.0.11 2023-07-06 [2] CRAN (R 4.3.2)
## RCurl 1.98-1.13 2023-11-02 [2] CRAN (R 4.3.2)
## readr * 2.1.5 2024-01-10 [1] CRAN (R 4.3.2)
## remotes 2.4.2.1 2023-07-18 [2] CRAN (R 4.3.2)
## reshape2 1.4.4 2020-04-09 [2] CRAN (R 4.3.2)
## rhdf5 2.46.1 2023-11-29 [1] Bioconductor 3.18 (R 4.3.2)
## rhdf5filters 1.14.1 2023-11-06 [1] Bioconductor
## Rhdf5lib 1.24.2 2024-02-07 [1] Bioconductor 3.18 (R 4.3.2)
## rlang 1.1.2 2023-11-04 [2] CRAN (R 4.3.2)
## rmarkdown 2.25 2023-09-18 [2] CRAN (R 4.3.2)
## rstudioapi 0.15.0 2023-07-07 [2] CRAN (R 4.3.2)
## S4Vectors 0.40.1 2023-10-26 [2] Bioconductor
## sass 0.4.7 2023-07-15 [2] CRAN (R 4.3.2)
## scales 1.3.0 2023-11-28 [2] CRAN (R 4.3.2)
## sessioninfo 1.2.2 2021-12-06 [2] CRAN (R 4.3.2)
## shiny 1.7.5.1 2023-10-14 [2] CRAN (R 4.3.2)
## stringi 1.7.12 2023-01-11 [2] CRAN (R 4.3.2)
## stringr * 1.5.0 2022-12-02 [2] CRAN (R 4.3.2)
## survival 3.5-7 2023-08-14 [2] CRAN (R 4.3.2)
## tibble * 3.2.1 2023-03-20 [2] CRAN (R 4.3.2)
## tidyr * 1.3.0 2023-01-24 [2] CRAN (R 4.3.2)
## tidyselect 1.2.0 2022-10-10 [2] CRAN (R 4.3.2)
## tidyverse * 2.0.0 2023-02-22 [1] CRAN (R 4.3.2)
## timechange 0.3.0 2024-01-18 [1] CRAN (R 4.3.2)
## tzdb 0.4.0 2023-05-12 [1] CRAN (R 4.3.2)
## urlchecker 1.0.1 2021-11-30 [2] CRAN (R 4.3.2)
## usethis * 2.2.2 2023-07-06 [2] CRAN (R 4.3.2)
## utf8 1.2.4 2023-10-22 [2] CRAN (R 4.3.2)
## vctrs 0.6.4 2023-10-12 [2] CRAN (R 4.3.2)
## vegan * 2.6-4 2022-10-11 [1] CRAN (R 4.3.2)
## withr 2.5.2 2023-10-30 [2] CRAN (R 4.3.2)
## xfun 0.41 2023-11-01 [2] CRAN (R 4.3.2)
## xtable 1.8-4 2019-04-21 [2] CRAN (R 4.3.2)
## XVector 0.42.0 2023-10-24 [2] Bioconductor
## yaml 2.3.7 2023-01-23 [2] CRAN (R 4.3.2)
## zlibbioc 1.48.0 2023-10-24 [2] Bioconductor
##
## [1] /home/dt473/R/x86_64-pc-linux-gnu-library/4.3
## [2] /programs/R-4.3.2/library
##
## ──────────────────────────────────────────────────────────────────────────────